{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Fictitious Names"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Introduction:\n",
    "\n",
    "This time you will create a data again \n",
    "\n",
    "Special thanks to [Chris Albon](http://chrisalbon.com/) for sharing the dataset and materials.\n",
    "All the credits to this exercise belongs to him.  \n",
    "\n",
    "In order to understand about it go to [here](https://blog.codinghorror.com/a-visual-explanation-of-sql-joins/).\n",
    "\n",
    "### Step 1. Import the necessary libraries"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Step 2. Create the 3 DataFrames based on the following raw data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "raw_data_1 = {\n",
    "        'subject_id': ['1', '2', '3', '4', '5'],\n",
    "        'first_name': ['Alex', 'Amy', 'Allen', 'Alice', 'Ayoung'], \n",
    "        'last_name': ['Anderson', 'Ackerman', 'Ali', 'Aoni', 'Atiches']}\n",
    "\n",
    "raw_data_2 = {\n",
    "        'subject_id': ['4', '5', '6', '7', '8'],\n",
    "        'first_name': ['Billy', 'Brian', 'Bran', 'Bryce', 'Betty'], \n",
    "        'last_name': ['Bonder', 'Black', 'Balwner', 'Brice', 'Btisan']}\n",
    "\n",
    "raw_data_3 = {\n",
    "        'subject_id': ['1', '2', '3', '4', '5', '7', '8', '9', '10', '11'],\n",
    "        'test_id': [51, 15, 15, 61, 16, 14, 15, 1, 61, 16]}"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Step 3. Assign each to a variable called data1, data2, data3"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>subject_id</th>\n",
       "      <th>test_id</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>51</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>15</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>15</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>4</td>\n",
       "      <td>61</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>5</td>\n",
       "      <td>16</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>7</td>\n",
       "      <td>14</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>8</td>\n",
       "      <td>15</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>9</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>10</td>\n",
       "      <td>61</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>11</td>\n",
       "      <td>16</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  subject_id  test_id\n",
       "0          1       51\n",
       "1          2       15\n",
       "2          3       15\n",
       "3          4       61\n",
       "4          5       16\n",
       "5          7       14\n",
       "6          8       15\n",
       "7          9        1\n",
       "8         10       61\n",
       "9         11       16"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data1 = pd.DataFrame(raw_data_1, columns = ['subject_id', 'first_name', 'last_name'])\n",
    "data2 = pd.DataFrame(raw_data_2, columns = ['subject_id', 'first_name', 'last_name'])\n",
    "data3 = pd.DataFrame(raw_data_3, columns = ['subject_id','test_id'])\n",
    "\n",
    "data3"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Step 4. Join the two dataframes along rows and assign all_data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>subject_id</th>\n",
       "      <th>first_name</th>\n",
       "      <th>last_name</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>Alex</td>\n",
       "      <td>Anderson</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>Amy</td>\n",
       "      <td>Ackerman</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>Allen</td>\n",
       "      <td>Ali</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>4</td>\n",
       "      <td>Alice</td>\n",
       "      <td>Aoni</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>5</td>\n",
       "      <td>Ayoung</td>\n",
       "      <td>Atiches</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>4</td>\n",
       "      <td>Billy</td>\n",
       "      <td>Bonder</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>5</td>\n",
       "      <td>Brian</td>\n",
       "      <td>Black</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>6</td>\n",
       "      <td>Bran</td>\n",
       "      <td>Balwner</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>7</td>\n",
       "      <td>Bryce</td>\n",
       "      <td>Brice</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>8</td>\n",
       "      <td>Betty</td>\n",
       "      <td>Btisan</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  subject_id first_name last_name\n",
       "0          1       Alex  Anderson\n",
       "1          2        Amy  Ackerman\n",
       "2          3      Allen       Ali\n",
       "3          4      Alice      Aoni\n",
       "4          5     Ayoung   Atiches\n",
       "0          4      Billy    Bonder\n",
       "1          5      Brian     Black\n",
       "2          6       Bran   Balwner\n",
       "3          7      Bryce     Brice\n",
       "4          8      Betty    Btisan"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "all_data = pd.concat([data1, data2])\n",
    "all_data"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Step 5. Join the two dataframes along columns and assing to all_data_col"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>subject_id</th>\n",
       "      <th>first_name</th>\n",
       "      <th>last_name</th>\n",
       "      <th>subject_id</th>\n",
       "      <th>first_name</th>\n",
       "      <th>last_name</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>Alex</td>\n",
       "      <td>Anderson</td>\n",
       "      <td>4</td>\n",
       "      <td>Billy</td>\n",
       "      <td>Bonder</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>Amy</td>\n",
       "      <td>Ackerman</td>\n",
       "      <td>5</td>\n",
       "      <td>Brian</td>\n",
       "      <td>Black</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>Allen</td>\n",
       "      <td>Ali</td>\n",
       "      <td>6</td>\n",
       "      <td>Bran</td>\n",
       "      <td>Balwner</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>4</td>\n",
       "      <td>Alice</td>\n",
       "      <td>Aoni</td>\n",
       "      <td>7</td>\n",
       "      <td>Bryce</td>\n",
       "      <td>Brice</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>5</td>\n",
       "      <td>Ayoung</td>\n",
       "      <td>Atiches</td>\n",
       "      <td>8</td>\n",
       "      <td>Betty</td>\n",
       "      <td>Btisan</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  subject_id first_name last_name subject_id first_name last_name\n",
       "0          1       Alex  Anderson          4      Billy    Bonder\n",
       "1          2        Amy  Ackerman          5      Brian     Black\n",
       "2          3      Allen       Ali          6       Bran   Balwner\n",
       "3          4      Alice      Aoni          7      Bryce     Brice\n",
       "4          5     Ayoung   Atiches          8      Betty    Btisan"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "all_data_col = pd.concat([data1, data2], axis = 1)\n",
    "all_data_col"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Step 6. Print data3"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>subject_id</th>\n",
       "      <th>test_id</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>51</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>15</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>15</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>4</td>\n",
       "      <td>61</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>5</td>\n",
       "      <td>16</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>7</td>\n",
       "      <td>14</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>8</td>\n",
       "      <td>15</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>9</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>10</td>\n",
       "      <td>61</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>11</td>\n",
       "      <td>16</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  subject_id  test_id\n",
       "0          1       51\n",
       "1          2       15\n",
       "2          3       15\n",
       "3          4       61\n",
       "4          5       16\n",
       "5          7       14\n",
       "6          8       15\n",
       "7          9        1\n",
       "8         10       61\n",
       "9         11       16"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data3"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Step 7. Merge all_data and data3 along the subject_id value"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>subject_id</th>\n",
       "      <th>first_name</th>\n",
       "      <th>last_name</th>\n",
       "      <th>test_id</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>Alex</td>\n",
       "      <td>Anderson</td>\n",
       "      <td>51</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>Amy</td>\n",
       "      <td>Ackerman</td>\n",
       "      <td>15</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>Allen</td>\n",
       "      <td>Ali</td>\n",
       "      <td>15</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>4</td>\n",
       "      <td>Alice</td>\n",
       "      <td>Aoni</td>\n",
       "      <td>61</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>4</td>\n",
       "      <td>Billy</td>\n",
       "      <td>Bonder</td>\n",
       "      <td>61</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>5</td>\n",
       "      <td>Ayoung</td>\n",
       "      <td>Atiches</td>\n",
       "      <td>16</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>5</td>\n",
       "      <td>Brian</td>\n",
       "      <td>Black</td>\n",
       "      <td>16</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>7</td>\n",
       "      <td>Bryce</td>\n",
       "      <td>Brice</td>\n",
       "      <td>14</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>8</td>\n",
       "      <td>Betty</td>\n",
       "      <td>Btisan</td>\n",
       "      <td>15</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  subject_id first_name last_name  test_id\n",
       "0          1       Alex  Anderson       51\n",
       "1          2        Amy  Ackerman       15\n",
       "2          3      Allen       Ali       15\n",
       "3          4      Alice      Aoni       61\n",
       "4          4      Billy    Bonder       61\n",
       "5          5     Ayoung   Atiches       16\n",
       "6          5      Brian     Black       16\n",
       "7          7      Bryce     Brice       14\n",
       "8          8      Betty    Btisan       15"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.merge(all_data, data3, on='subject_id')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Step 8. Merge only the data that has the same 'subject_id' on both data1 and data2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>subject_id</th>\n",
       "      <th>first_name_x</th>\n",
       "      <th>last_name_x</th>\n",
       "      <th>first_name_y</th>\n",
       "      <th>last_name_y</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>4</td>\n",
       "      <td>Alice</td>\n",
       "      <td>Aoni</td>\n",
       "      <td>Billy</td>\n",
       "      <td>Bonder</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>5</td>\n",
       "      <td>Ayoung</td>\n",
       "      <td>Atiches</td>\n",
       "      <td>Brian</td>\n",
       "      <td>Black</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  subject_id first_name_x last_name_x first_name_y last_name_y\n",
       "0          4        Alice        Aoni        Billy      Bonder\n",
       "1          5       Ayoung     Atiches        Brian       Black"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.merge(data1, data2, on='subject_id', how='inner')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Step 9. Merge all values in data1 and data2, with matching records from both sides where available."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>subject_id</th>\n",
       "      <th>first_name_x</th>\n",
       "      <th>last_name_x</th>\n",
       "      <th>first_name_y</th>\n",
       "      <th>last_name_y</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>Alex</td>\n",
       "      <td>Anderson</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>Amy</td>\n",
       "      <td>Ackerman</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>Allen</td>\n",
       "      <td>Ali</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>4</td>\n",
       "      <td>Alice</td>\n",
       "      <td>Aoni</td>\n",
       "      <td>Billy</td>\n",
       "      <td>Bonder</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>5</td>\n",
       "      <td>Ayoung</td>\n",
       "      <td>Atiches</td>\n",
       "      <td>Brian</td>\n",
       "      <td>Black</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>6</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Bran</td>\n",
       "      <td>Balwner</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>7</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Bryce</td>\n",
       "      <td>Brice</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>8</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Betty</td>\n",
       "      <td>Btisan</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  subject_id first_name_x last_name_x first_name_y last_name_y\n",
       "0          1         Alex    Anderson          NaN         NaN\n",
       "1          2          Amy    Ackerman          NaN         NaN\n",
       "2          3        Allen         Ali          NaN         NaN\n",
       "3          4        Alice        Aoni        Billy      Bonder\n",
       "4          5       Ayoung     Atiches        Brian       Black\n",
       "5          6          NaN         NaN         Bran     Balwner\n",
       "6          7          NaN         NaN        Bryce       Brice\n",
       "7          8          NaN         NaN        Betty      Btisan"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.merge(data1, data2, on='subject_id', how='outer')"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 2",
   "language": "python",
   "name": "python2"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.11"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}
